###################################################
## Demographic Analysis
## SCRIPT 5: Script to analyze the mergesplit runs
## created in script 4.
##
## Creates precincts_vs_districts_complete.pdf for memo.
## 5/17/21
###################################################

library(ppmf)
library(geomander)
library(tidyverse)
library(sf)
library(redist)
library(ggridges)
library(patchwork)

la_all <- readRDS("../../data/LA/la.Rds") %>%
  rename(sld_upper = sld_up,
         sld_lower = sld_low) %>%
  filter(sld_lower != 106)

la_map <- la_all %>%
  st_set_crs(26915) %>% 
  st_transform(crs = 4269) %>%
  redist_map(
    total_pop = pop,
    pop_tol = 0.05,
    existing_plan = sld_lower
  )

district_focus <- c(29, 61, 63:71, 101, 18, 62, 60)
ebr_map <- la_map %>% filter(sld_lower %in% district_focus)
ebr_map_ng <- ebr_map %>% st_drop_geometry()

## vector from script 5 of VRA strengths
strength_vals <- c(0, 1, 10, 100, 500, 1000, 10000, 25000, 50000)

## result containers
final_vals <- NULL
district_vals <- NULL
district_vals4 <- NULL

## helper function: is each precinct in an MMD?
is_mmd <- function(my_dist_vec) {
  ebr_map %>%
    mutate(dist_vec = my_dist_vec) %>%
    st_drop_geometry() %>%
    group_by(dist_vec) %>%
    mutate(black_pct = sum(pop_black) / sum(pop),
           maj_min = black_pct >= 0.5) %>%
    pull(black_pct)
}

for (i in 1:length(strength_vals)) {
  cat(i, "\n")
  
  path_census <- paste("../../data/LA/sim/vra_test/ms_", strength_vals[i], ".rds", sep = "")
  path_4 <-      paste("../../data/LA/sim/vra_test/ms4_", strength_vals[i], ".rds", sep = "")
  path_12 <-      paste("../../data/LA/sim/vra_test/ms12_", strength_vals[i], ".rds", sep = "")
  path_19 <-      paste("../../data/LA/sim/vra_test/ms19_", strength_vals[i], ".rds", sep = "")
  
  plans_census <- readRDS(path_census)
  plans_4 <- readRDS(path_4)
  plans_12 <- readRDS(path_12)
  plans_19 <- readRDS(path_19)
  
  m1 <- get_plans_matrix(plans_census)
  m4 <- get_plans_matrix(plans_4)
  m12 <- get_plans_matrix(plans_12)
  m19 <- get_plans_matrix(plans_19)
  
  ## shorten plans, we don't have all day
  sequence <- seq(0, ncol(m1), by = 10)[-1]
  
  m1 <- m1[,sequence]
  m4 <- m4[,sequence]
  m12 <- m12[,sequence]
  m19 <- m19[,sequence]
  
  cat(i, ": District Sum \n")
  
  district_vals_applied <- apply(m1, 2, function(z) {
    resum <- ebr_map_ng %>%
      mutate(ndist = z) %>%
      group_by(ndist) %>%
      summarise(m1_mm = sum(pop_black) / sum(pop),
                m4_mm = sum(v4_pop_black) / sum(v4_pop),
                mm_diff = m1_mm - m4_mm,
                strength = strength_vals[i], .groups = "drop") %>% as_tibble()
  })
  
  district_vals_applied <- do.call(bind_rows, district_vals_applied)
  district_vals <- bind_rows(district_vals, district_vals_applied)
  
  cat(i, ": Precinct Sum \n")
  
  ## Under each plan, is precinct x in an MMD?
  pct_real <- tibble(n = 1:length(sequence)) %>%
    mutate(black_pct = map(n, ~ is_mmd(m1[,.])))
  
  pct_4 <- tibble(n = 1:length(sequence)) %>%
    mutate(black_pct4 = map(n, ~ is_mmd(m4[,.])))
  
  pct_12 <- tibble(n = 1:length(sequence)) %>%
    mutate(black_pct12 = map(n, ~ is_mmd(m12[,.])))
  
  pct_19 <- tibble(n = 1:length(sequence)) %>%
    mutate(black_pct19 = map(n, ~ is_mmd(m19[,.])))

  n_precinct <- nrow(ebr_map)
  
  ## let's unnest them, easier to work with
  pct_real <- pct_real %>% select(black_pct) %>% unnest() %>%
    mutate(n = rep(1:n_precinct, length(sequence)), 
           draw = rep(1:length(sequence), each = n_precinct))
  pct_4 <- pct_4 %>% select(black_pct4) %>% unnest() %>%
    mutate(n = rep(1:n_precinct, length(sequence)), 
           draw = rep(1:length(sequence), each = n_precinct))
  pct_12 <- pct_12 %>% select(black_pct12) %>% unnest() %>%
    mutate(n = rep(1:n_precinct, length(sequence)), 
           draw = rep(1:length(sequence), each = n_precinct))
  pct_19 <- pct_19 %>% select(black_pct19) %>% unnest() %>%
    mutate(n = rep(1:n_precinct, length(sequence)),
           draw = rep(1:length(sequence), each = n_precinct))
  
  med_real <- rep(NA, n_precinct)
  med_4 <- rep(NA, n_precinct)
  med_12 <- rep(NA, n_precinct)
  med_19 <- rep(NA, n_precinct)
  
  for (j in 1:n_precinct) {
    if (j %% 10 == 0)
      cat(j, "\n")
    med_real[j] <- pct_real %>% filter(n == j) %>% 
      mutate(m = black_pct > 0.5) %>% 
      summarize(m = mean(m)) %>% pull(m)
    med_4[j] <- pct_4 %>% filter(n == j) %>%
      mutate(m = black_pct4 > 0.5) %>%
      summarize(m = mean(m)) %>% pull(m)
    med_12[j] <- pct_12 %>% filter(n == j) %>%
      mutate(m = black_pct12 > 0.5) %>%
      summarize(m = mean(m)) %>% pull(m)
    med_19[j] <- pct_19 %>% filter(n == j) %>%
      mutate(m = black_pct19 > 0.5) %>%
      summarize(m = mean(m)) %>% pull(m)
  }
  
  res <- tibble(strength = strength_vals[i],
                med_real = med_real,
                med_4 = med_4,
                med_12 = med_12,
                med_diff = med_real - med_4,
                med_diff_12 = med_real - med_12,
                med_diff_19 = med_real - med_19)
  
  final_vals <- bind_rows(final_vals, res)
}

# for theme_ppmf()
source("../00_custom_functions.R")

## Census vs DAS-4
p <- ggplot(final_vals, 
            aes(x = med_diff, 
                y = as.factor(strength), 
                group = as.factor(strength))) + 
  geom_density_ridges_gradient(scale = 3, 
                               rel_min_height = 0.01,
                               bandwidth = 0.01, fill = "#ACACAC") +
  theme_bw() + 
  labs(x = "Difference in Precinct MMD Probability",
       y = "Strength of VRA Constraint",
       title = "Census vs. DAS-4.5") + 
  scale_y_discrete(expand = expansion(mult = c(0.1,0.3))) + 
  theme(legend.position = "none",
        plot.title = element_text(size = 13, hjust = 0.5, face = "bold")) +
  theme_ppmf() + 
  theme(plot.title = element_text(hjust = 0.5))

## Census vs DAS-12
p12 <- ggplot(final_vals, 
            aes(x = med_diff_12, 
                y = as.factor(strength), 
                group = as.factor(strength))) + 
  geom_density_ridges_gradient(scale = 3, 
                               rel_min_height = 0.01,
                               bandwidth = 0.01, fill = "#ACACAC") +
  # scale_fill_viridis_c(option = "cividis") +
  theme_bw() + 
  labs(x = "Difference in Precinct MMD Probability",
       y = "Strength of VRA Constraint",
       title = "Census vs. DAS-12.2") + 
  scale_y_discrete(expand = expansion(mult = c(0.1,0.3))) + 
  theme(legend.position = "none",
        plot.title = element_text(size = 13, hjust = 0.5, face = "bold")) + 
  theme_ppmf() + 
  theme(plot.title = element_text(hjust = 0.5))

p_complete <- p12 + p
ggsave(plot = p_complete, "../../figs/precincts_mmd_prob.pdf", 
       height = 4, width = 8)

p_19 <- ggplot(final_vals,
              aes(x = med_diff_19,
                  y = as.factor(strength),
                  group = as.factor(strength))) +
  geom_density_ridges_gradient(scale = 3,
                               rel_min_height = 0.01,
                               bandwidth = 0.01, fill = "#ACACAC") +
  # scale_fill_viridis_c(option = "cividis") +
  theme_bw() +
  labs(x = "Difference in Precinct MMD Probability",
       y = "Strength of VRA Constraint",
       title = "Census vs. DAS-19.61") +
  scale_y_discrete(expand = expansion(mult = c(0.1,0.3))) +
  theme(legend.position = "none",
        plot.title = element_text(size = 13, hjust = 0.5, face = "bold")) +
  theme_ppmf() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave(plot = p_19, "../../figs/precincts_mmd_prob_post.pdf",
       height = 4, width = 8)

## initial version of paper had district-level results too, now described elsewhere
# ## Read non-VRA simulations for district-level comparisons.
# plans_census <- readRDS("../../data/LA/sim/mmd_test/novra500k2_census_0.05.rds")
# plans_4 <- readRDS("../../data/LA/sim/mmd_test/novra500k2_d4_0.05.rds")
# plans_12 <- readRDS("../../data/LA/sim/mmd_test/novra500k2_d12_0.05.rds")
# 
# ## for summarize_plans()
# source("tyler_utils.R")
# 
# plans_c_sum <- summarize_plans(plans = plans_census,
#                                map = ebr_map,
#                                county_vec = ebr_map$COUNTYFP10,
#                                min_vector = ebr_map$pop_black,
#                                tot_vector = ebr_map$pop)
# 
# plans_4_sum <- summarize_plans(plans = plans_4,
#                                map = ebr_map,
#                                county_vec = ebr_map$COUNTYFP10,
#                                min_vector = ebr_map$v4_pop_black,
#                                tot_vector = ebr_map$v4_pop)
# 
# plans_12_sum <- summarize_plans(plans = plans_12,
#                                 map = ebr_map,
#                                 county_vec = ebr_map$COUNTYFP10,
#                                 min_vector = ebr_map$v12_pop_black,
#                                 tot_vector = ebr_map$v12_pop)
# 
# ## MMD subplot
# res_line <- plans_c_sum$sum_plans$maj_min[1] # reference line
# 
# p_mms <- bind_rows(plans_c_sum$sum_plans, 
#           plans_4_sum$sum_plans,
#           plans_12_sum$sum_plans, .id = "type") %>%
#   mutate(type = case_when(type == "1" ~ "Census",
#                           type == "2" ~ "DAS4",
#                           type == "3" ~ "DAS12")) %>%
#   ggplot(aes(x = maj_min, fill = type)) + 
#   geom_bar(aes(y = ..count../sum(..count..)),
#                  color = "#001021", position = "dodge") + 
#   geom_vline(xintercept = res_line,
#              lty = "dashed", col = "#EA7317") + theme_bw() + 
#   scale_y_continuous("Fraction of plans", 
#                      breaks = c(0, 0.2),
#                      labels= c("0%", "20%")) +
#   scale_fill_manual(name = "Data Source", 
#                     values = c("#9FB6DA", "#759C44", "#29483A")) + 
#   labs(x = NULL, title = "Majority Minority Districts") + 
#   theme_ppmf() +
#   theme(plot.title = element_text(hjust = 0.5)) + 
#   xlim(c(1,8)) +
#   guides(fill = FALSE) 
# 
# ## County split subplot
# res_line <- plans_c_sum$splits[1] # reference line
# 
# p_splits <- bind_rows(tibble(splits = plans_c_sum$splits, 
#                              type = "Census"),
#                       tibble(splits = plans_4_sum$splits, 
#                              type = "DAS-4.5"),
#                       tibble(splits = plans_12_sum$splits, 
#                              type = "DAS-12.2")) %>%
#   ggplot(aes(x = splits, fill = type)) + 
#   geom_bar(aes(y = ..count../sum(..count..)),
#            color = "#001021", position = "dodge") + 
#   geom_vline(xintercept = res_line,
#              lty = "dashed", col = "#EA7317") + theme_bw() + 
#   scale_y_continuous("Fraction of plans", 
#                      breaks = c(0, 0.5, 0.10, 0.15),
#                      labels= c("0%", "5%", "10%", "15%")) + 
#   scale_fill_manual(name = NULL, 
#                     values = c("#9FB6DA", "#759C44", "#29483A")) +
#   labs(x = NULL, y = "Percentage of Districts",
#        title = "County Splits") + 
#   theme_ppmf() + 
#   theme(plot.title = element_text(hjust = 0.5)) + 
#   theme(legend.position = "bottom") +
#   theme(legend.key.size = unit(0.15, 'cm'))
# 
# ## Compactness subplot
# psm1 <- plans_c_sum$small_plans %>%
#   mutate(comp = distr_compactness(ebr_map, "FracKept",
#                                   perim_df=redist.prep.polsbypopper(ebr_map, ebr_map$adj)))
# psm4 <- plans_4_sum$small_plans %>%
#   mutate(comp = distr_compactness(ebr_map, "FracKept",
#                                   perim_df=redist.prep.polsbypopper(ebr_map, ebr_map$adj)))
# psm12 <- plans_12_sum$small_plans %>%
#   mutate(comp = distr_compactness(ebr_map, "FracKept",
#                                   perim_df=redist.prep.polsbypopper(ebr_map, ebr_map$adj)))
# 
# ref_line <- psm1$comp[1] # reference line
# 
# p_comp <- bind_rows(psm1, psm4, psm12, .id = "type") %>%
#   mutate(type = case_when(type == "1" ~ "Census",
#                           type == "2" ~ "DAS4",
#                           type == "3" ~ "DAS12")) %>%
#   ggplot(aes(x = comp, fill = type)) + 
#   geom_density(aes(y = ..scaled..), alpha = 0.8) + 
#   geom_vline(xintercept = ref_line,
#              lty = "dashed", col = "#EA7317") + 
#   scale_fill_manual(name = "Data Source", 
#                     values = c("#9FB6DA", "#759C44", "#29483A")) +
#   guides(fill = guide_legend(override.aes = list(alpha = 1))) +
#   theme_bw() + 
#   labs(x = NULL, y = "Density") + 
#   labs(title = "Fraction of Edges Kept") + 
#   guides(fill = FALSE)+ 
#   theme_ppmf() + 
#   theme(plot.title = element_text(hjust = 0.5))
# 
# ## Put them all together
# layout <- "
# AAABBEEE
# AAACCEEE
# AAADDEEE
# "
# 
# p_complete <- p12 + p_mms + p_comp + p_splits + p + 
#   plot_layout(design = layout)
# ggsave(plot = p_complete, "../../figs/precincts_vs_districts_complete_final.pdf", 
#        height = 4, width = 8)
